1   /*
2    * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.
8    *
9    * This code is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12   * version 2 for more details (a copy is included in the LICENSE file that
13   * accompanied this code).
14   *
15   * You should have received a copy of the GNU General Public License version
16   * 2 along with this work; if not, write to the Free Software Foundation,
17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18   *
19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20   * or visit www.oracle.com if you need additional information or have any
21   * questions.
22   */
23  
24  /*
25   * @test
26   * @bug 6831794 6229811
27   * @summary Test EUC_TW charset
28   */
29  
30  import java.nio.charset.*;
31  import java.nio.*;
32  import java.util.*;
33  
34  public class TestEUC_TW {
35  
36      static class Time {
37          long t;
38      }
39      static int iteration = 100;
40  
41      static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
42          throws Exception {
43          String csn = cs.name();
44          CharsetDecoder dec = cs.newDecoder();
45          ByteBuffer bbf;
46          CharBuffer cbf;
47          if (testDirect) {
48              bbf = ByteBuffer.allocateDirect(bb.length);
49              cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
50              bbf.put(bb);
51          } else {
52              bbf = ByteBuffer.wrap(bb);
53              cbf = CharBuffer.allocate(bb.length);
54          }
55          CoderResult cr = null;
56          long t1 = System.nanoTime()/1000;
57          for (int i = 0; i < iteration; i++) {
58              bbf.rewind();
59              cbf.clear();
60              dec.reset();
61              cr = dec.decode(bbf, cbf, true);
62          }
63          long t2 = System.nanoTime()/1000;
64          if (t != null)
65          t.t = (t2 - t1)/iteration;
66          if (cr != CoderResult.UNDERFLOW) {
67              System.out.println("DEC-----------------");
68              int pos = bbf.position();
69              System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
70                                cr.toString(), pos,
71                                bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
72              throw new RuntimeException("Decoding err: " + csn);
73          }
74          char[] cc = new char[cbf.position()];
75          cbf.flip(); cbf.get(cc);
76          return cc;
77  
78      }
79  
80      static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
81          throws Exception {
82          CharsetDecoder dec = cs.newDecoder();
83          ByteBuffer bbf;
84          CharBuffer cbf;
85          if (testDirect) {
86              bbf = ByteBuffer.allocateDirect(bb.length);
87              cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
88              bbf.put(bb).flip();
89          } else {
90              bbf = ByteBuffer.wrap(bb);
91              cbf = CharBuffer.allocate(bb.length);
92          }
93          return dec.decode(bbf, cbf, true);
94      }
95  
96      static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
97          throws Exception {
98          ByteBuffer bbf;
99          CharBuffer cbf;
100         CharsetEncoder enc = cs.newEncoder();
101         String csn = cs.name();
102         if (testDirect) {
103             bbf = ByteBuffer.allocateDirect(cc.length * 4);
104             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
105             cbf.put(cc).flip();
106         } else {
107             bbf = ByteBuffer.allocate(cc.length * 4);
108             cbf = CharBuffer.wrap(cc);
109         }
110         CoderResult cr = null;
111         long t1 = System.nanoTime()/1000;
112         for (int i = 0; i < iteration; i++) {
113             cbf.rewind();
114             bbf.clear();
115             enc.reset();
116             cr = enc.encode(cbf, bbf, true);
117         }
118         long t2 = System.nanoTime()/1000;
119         if (t != null)
120         t.t = (t2 - t1)/iteration;
121         if (cr != CoderResult.UNDERFLOW) {
122             System.out.println("ENC-----------------");
123             int pos = cbf.position();
124             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
125                               cr.toString(), pos, cc[pos]&0xffff);
126             throw new RuntimeException("Encoding err: " + csn);
127         }
128         byte[] bb = new byte[bbf.position()];
129         bbf.flip(); bbf.get(bb);
130         return bb;
131     }
132 
133     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
134         throws Exception {
135         ByteBuffer bbf;
136         CharBuffer cbf;
137         CharsetEncoder enc = cs.newEncoder();
138         if (testDirect) {
139             bbf = ByteBuffer.allocateDirect(cc.length * 4);
140             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
141             cbf.put(cc).flip();
142         } else {
143             bbf = ByteBuffer.allocate(cc.length * 4);
144             cbf = CharBuffer.wrap(cc);
145         }
146         return enc.encode(cbf, bbf, true);
147     }
148 
149     static char[] getEUC_TWChars(boolean skipNR) {
150         //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder();
151         CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
152         CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
153         char[] cc = new char[0x20000];
154         char[] c2 = new char[2];
155         int pos = 0;
156         int i = 0;
157         //bmp
158         for (i = 0; i < 0x10000; i++) {
159             //SKIP these 3 NR codepoints if compared to EUC_TW
160             if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
161                 continue;
162             if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
163                 System.out.printf("  Err i=%x:  old=%b new=%b%n", i,
164                                   encOLD.canEncode((char)i),
165                                   enc.canEncode((char)i));
166                 throw new RuntimeException("canEncode() err!");
167             }
168 
169             if (enc.canEncode((char)i)) {
170                 cc[pos++] = (char)i;
171             }
172         }
173 
174         //supp
175         CharBuffer cb = CharBuffer.wrap(new char[2]);
176         for (i = 0x20000; i < 0x30000; i++) {
177             Character.toChars(i, c2, 0);
178             cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
179 
180             if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
181                 throw new RuntimeException("canEncode() err!");
182             }
183 
184             if (enc.canEncode(cb)) {
185                 //System.out.printf("cp=%x,  (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff);
186                 cc[pos++] = c2[0];
187                 cc[pos++] = c2[1];
188             }
189         }
190 
191         return Arrays.copyOf(cc, pos);
192     }
193 
194     static void checkRoundtrip(Charset cs) throws Exception {
195         char[] cc = getEUC_TWChars(false);
196         System.out.printf("Check roundtrip <%s>...", cs.name());
197         byte[] bb = encode(cc, cs, false, null);
198         char[] ccO = decode(bb, cs, false, null);
199 
200         if (!Arrays.equals(cc, ccO)) {
201             System.out.printf("    non-direct failed");
202         }
203         bb = encode(cc, cs, true, null);
204         ccO = decode(bb, cs, true, null);
205         if (!Arrays.equals(cc, ccO)) {
206             System.out.printf("    (direct) failed");
207         }
208         System.out.println();
209     }
210 
211     static void checkInit(String csn) throws Exception {
212         System.out.printf("Check init <%s>...%n", csn);
213         Charset.forName("Big5");    // load in the ExtendedCharsets
214         long t1 = System.nanoTime()/1000;
215         Charset cs = Charset.forName(csn);
216         long t2 = System.nanoTime()/1000;
217         System.out.printf("    charset     :%d%n", t2 - t1);
218         t1 = System.nanoTime()/1000;
219             cs.newDecoder();
220         t2 = System.nanoTime()/1000;
221         System.out.printf("    new Decoder :%d%n", t2 - t1);
222 
223         t1 = System.nanoTime()/1000;
224             cs.newEncoder();
225         t2 = System.nanoTime()/1000;
226         System.out.printf("    new Encoder :%d%n", t2 - t1);
227     }
228 
229     static void compare(Charset cs1, Charset cs2) throws Exception {
230         char[] cc = getEUC_TWChars(true);
231 
232         String csn1 = cs1.name();
233         String csn2 = cs2.name();
234         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
235 
236         Time t1 = new Time();
237         Time t2 = new Time();
238 
239         byte[] bb1 = encode(cc, cs1, false, t1);
240         byte[] bb2 = encode(cc, cs2, false, t2);
241 
242         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
243                           csn2, csn1,
244                           t2.t, t1.t,
245                           (double)(t2.t)/(t1.t));
246         if (!Arrays.equals(bb1, bb2)) {
247             System.out.printf("        encoding failed%n");
248         }
249 
250         char[] cc2 = decode(bb1, cs2, false, t2);
251         char[] cc1 = decode(bb1, cs1, false, t1);
252         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
253                           csn2, csn1,
254                           t2.t, t1.t,
255                           (double)(t2.t)/(t1.t));
256         if (!Arrays.equals(cc1, cc2)) {
257             System.out.printf("        decoding failed%n");
258         }
259 
260         bb1 = encode(cc, cs1, true, t1);
261         bb2 = encode(cc, cs2, true, t2);
262 
263         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
264                           csn2, csn1,
265                           t2.t, t1.t,
266                           (double)(t2.t)/(t1.t));
267 
268         if (!Arrays.equals(bb1, bb2))
269             System.out.printf("        encoding (direct) failed%n");
270 
271         cc1 = decode(bb1, cs1, true, t1);
272         cc2 = decode(bb1, cs2, true, t2);
273         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
274                           csn2, csn1,
275                           t2.t, t1.t,
276                           (double)(t2.t)/(t1.t));
277         if (!Arrays.equals(cc1, cc2)) {
278             System.out.printf("        decoding (direct) failed%n");
279         }
280     }
281 
282     // The first byte is the length of malformed bytes
283     static byte[][] malformed = {
284         //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
285     };
286 
287     static void checkMalformed(Charset cs) throws Exception {
288         boolean failed = false;
289         String csn = cs.name();
290         System.out.printf("Check malformed <%s>...%n", csn);
291         for (boolean direct: new boolean[] {false, true}) {
292             for (byte[] bins : malformed) {
293                 int mlen = bins[0];
294                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
295                 CoderResult cr = decodeCR(bin, cs, direct);
296                 String ashex = "";
297                 for (int i = 0; i < bin.length; i++) {
298                     if (i > 0) ashex += " ";
299                         ashex += Integer.toBinaryString((int)bin[i] & 0xff);
300                 }
301                 if (!cr.isMalformed()) {
302                     System.out.printf("        FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
303                     failed = true;
304                 } else if (cr.length() != mlen) {
305                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
306                     failed = true;
307                 }
308             }
309         }
310         if (failed)
311             throw new RuntimeException("Check malformed failed " + csn);
312     }
313 
314     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
315         int inPos = flow[0];
316         int inLen = flow[1];
317         int outPos = flow[2];
318         int outLen = flow[3];
319         int expedInPos = flow[4];
320         int expedOutPos = flow[5];
321         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
322                                           :CoderResult.OVERFLOW;
323         ByteBuffer bbf;
324         CharBuffer cbf;
325         if (direct) {
326             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
327             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
328         } else {
329             bbf = ByteBuffer.allocate(inPos + bytes.length);
330             cbf = CharBuffer.allocate(outPos + outLen);
331         }
332         bbf.position(inPos);
333         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
334         cbf.position(outPos);
335         dec.reset();
336         CoderResult cr = dec.decode(bbf, cbf, false);
337         if (cr != expedCR ||
338             bbf.position() != expedInPos ||
339             cbf.position() != expedOutPos) {
340             System.out.printf("Expected(direct=%5b): [", direct);
341             for (int i:flow) System.out.print(" " + i);
342             System.out.println("]  CR=" + cr +
343                                ", inPos=" + bbf.position() +
344                                ", outPos=" + cbf.position());
345             return false;
346         }
347         return true;
348     }
349 
350     static void checkUnderOverflow(Charset cs) throws Exception {
351         String csn = cs.name();
352         System.out.printf("Check under/overflow <%s>...%n", csn);
353         CharsetDecoder dec = cs.newDecoder();
354         boolean failed = false;
355         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
356         //0   1 2   3         7         11
357         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
358         int    inlen = bytes.length;
359 
360         int MAXOFF = 20;
361         for (int inoff = 0; inoff < MAXOFF; inoff++) {
362             for (int outoff = 0; outoff < MAXOFF; outoff++) {
363         int[][] Flows = {
364             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
365             //overflow
366             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
367             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
368             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
369             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
370             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
371             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
372             //underflow
373             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
374             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
375             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
376             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
377             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
378             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
379             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
380             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
381             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
382             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
383             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
384             // 2-byte under/overflow
385             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
386             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
387             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
388             // 4-byte  under/overflow
389             {inoff,  4,     outoff,  2,      inoff + 3,  outoff + 2, 0},
390             {inoff,  5,     outoff,  2,      inoff + 3,  outoff + 2, 0},
391             {inoff,  6,     outoff,  2,      inoff + 3,  outoff + 2, 0},
392             {inoff,  7,     outoff,  2,      inoff + 3,  outoff + 2, 1},
393             {inoff,  7,     outoff,  3,      inoff + 7,  outoff + 3, 0},
394             // 4-byte  under/overflow
395             {inoff,  8,     outoff,  3,      inoff + 7,  outoff + 3, 0},
396             {inoff,  9,     outoff,  3,      inoff + 7,  outoff + 3, 0},
397             {inoff, 10,     outoff,  3,      inoff + 7,  outoff + 3, 0},
398             {inoff, 11,     outoff,  3,      inoff + 7,  outoff + 3, 1},
399             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
400             // 4-byte/supp  under/overflow
401             {inoff, 11,     outoff,  4,      inoff +11,  outoff + 4, 0},
402             {inoff, 12,     outoff,  4,      inoff +11,  outoff + 4, 0},
403             {inoff, 13,     outoff,  4,      inoff +11,  outoff + 4, 0},
404             {inoff, 14,     outoff,  4,      inoff +11,  outoff + 4, 0},
405             {inoff, 15,     outoff,  4,      inoff +11,  outoff + 4, 1},
406             {inoff, 15,     outoff,  5,      inoff +11,  outoff + 4, 1},
407             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
408         };
409         for (boolean direct: new boolean[] {false, true}) {
410             for (int[] flow: Flows) {
411                 if (!check(dec, bytes, direct, flow))
412                     failed = true;
413             }
414         }}}
415         if (failed)
416             throw new RuntimeException("Check under/overflow failed " + csn);
417     }
418 
419     public static void main(String[] args) throws Exception {
420         // be the first one
421         //checkInit("EUC_TW_OLD");
422         checkInit("EUC_TW");
423         Charset euctw = Charset.forName("EUC_TW");
424         checkRoundtrip(euctw);
425         compare(euctw, new EUC_TW_OLD());
426         checkMalformed(euctw);
427         checkUnderOverflow(euctw);
428     }
429 }